# Any number can be chosen
set.seed(3141596)
otu_table input for
phyloseq).Output that we need:
otu_tabletax_tablesample_data - track the reads lost
throughout the DADA2 workflow# Efficient package loading with pacman
pacman::p_load(tidyverse, BiocManager, devtools, dada2,
phyloseq, patchwork, DT, iNEXT, vegan,
install = FALSE)
# Set the raw fastq path to the raw sequencing files
# Path to the fastq files
raw_fastqs_path <- "/local/workdir/zlr6/git_repos/biomi6300_moonmilk_amplicon_analysis/data/01_DADA2/01_raw_gzipped_fastqs"
raw_fastqs_path
## [1] "/local/workdir/zlr6/git_repos/biomi6300_moonmilk_amplicon_analysis/data/01_DADA2/01_raw_gzipped_fastqs"
# What files are in this path? Intuition Check
head(list.files(raw_fastqs_path))
## [1] "ERR11588428_1.fastq.gz" "ERR11588428_2.fastq.gz" "ERR11588429_1.fastq.gz"
## [4] "ERR11588429_2.fastq.gz" "ERR11588430_1.fastq.gz" "ERR11588430_2.fastq.gz"
# How many files are there?
str(list.files(raw_fastqs_path))
## chr [1:20] "ERR11588428_1.fastq.gz" "ERR11588428_2.fastq.gz" ...
# Create vector of forward reads
forward_reads <- list.files(raw_fastqs_path, pattern = "1.fastq.gz", full.names = TRUE)
# Intuition Check
head(forward_reads)
## [1] "/local/workdir/zlr6/git_repos/biomi6300_moonmilk_amplicon_analysis/data/01_DADA2/01_raw_gzipped_fastqs/ERR11588428_1.fastq.gz"
## [2] "/local/workdir/zlr6/git_repos/biomi6300_moonmilk_amplicon_analysis/data/01_DADA2/01_raw_gzipped_fastqs/ERR11588429_1.fastq.gz"
## [3] "/local/workdir/zlr6/git_repos/biomi6300_moonmilk_amplicon_analysis/data/01_DADA2/01_raw_gzipped_fastqs/ERR11588430_1.fastq.gz"
## [4] "/local/workdir/zlr6/git_repos/biomi6300_moonmilk_amplicon_analysis/data/01_DADA2/01_raw_gzipped_fastqs/ERR11588431_1.fastq.gz"
## [5] "/local/workdir/zlr6/git_repos/biomi6300_moonmilk_amplicon_analysis/data/01_DADA2/01_raw_gzipped_fastqs/ERR11588432_1.fastq.gz"
## [6] "/local/workdir/zlr6/git_repos/biomi6300_moonmilk_amplicon_analysis/data/01_DADA2/01_raw_gzipped_fastqs/ERR11588433_1.fastq.gz"
# Create a vector of reverse reads
reverse_reads <- list.files(raw_fastqs_path, pattern = "2.fastq.gz", full.names = TRUE)
head(reverse_reads)
## [1] "/local/workdir/zlr6/git_repos/biomi6300_moonmilk_amplicon_analysis/data/01_DADA2/01_raw_gzipped_fastqs/ERR11588428_2.fastq.gz"
## [2] "/local/workdir/zlr6/git_repos/biomi6300_moonmilk_amplicon_analysis/data/01_DADA2/01_raw_gzipped_fastqs/ERR11588429_2.fastq.gz"
## [3] "/local/workdir/zlr6/git_repos/biomi6300_moonmilk_amplicon_analysis/data/01_DADA2/01_raw_gzipped_fastqs/ERR11588430_2.fastq.gz"
## [4] "/local/workdir/zlr6/git_repos/biomi6300_moonmilk_amplicon_analysis/data/01_DADA2/01_raw_gzipped_fastqs/ERR11588431_2.fastq.gz"
## [5] "/local/workdir/zlr6/git_repos/biomi6300_moonmilk_amplicon_analysis/data/01_DADA2/01_raw_gzipped_fastqs/ERR11588432_2.fastq.gz"
## [6] "/local/workdir/zlr6/git_repos/biomi6300_moonmilk_amplicon_analysis/data/01_DADA2/01_raw_gzipped_fastqs/ERR11588433_2.fastq.gz"
Let’s see the quality of the raw reads before we trim
# Randomly select 2 samples from dataset to evaluate
random_samples <- sample(1:length(reverse_reads), size = 2)
random_samples
## [1] 9 4
# Calculate and plot quality of these two samples
forward_filteredQual_plot_2 <- plotQualityProfile(forward_reads[random_samples]) +
labs(title = "Forward Read: Raw Quality")
# Show the plot
forward_filteredQual_plot_2
reverse_filteredQual_plot_2 <- plotQualityProfile(reverse_reads[random_samples]) +
labs(title = "Reverse Read: Raw Quality")
# Show the plot
reverse_filteredQual_plot_2
# Plot them together with patchwork
forward_filteredQual_plot_2 + reverse_filteredQual_plot_2
# When I try to run this I get the following error: Error in Ops.data.frame(guide_loc, panel_loc) : ‘==’ only defined for equally-sized data frames
# Aggregate all QC plots
# Forward reads
forward_preQC_plot <-
plotQualityProfile(forward_reads, aggregate = TRUE) +
labs(title = "Forward Pre-QC")
# Show the plot
forward_preQC_plot
# reverse reads
reverse_preQC_plot <-
plotQualityProfile(reverse_reads, aggregate = TRUE) +
labs(title = "Reverse Pre-QC")
# Show the plot
reverse_preQC_plot
preQC_aggregate_plot <- forward_preQC_plot + reverse_preQC_plot
# Plot the forward and reverse together
# Show the plot
preQC_aggregate_plot
Here, we see that the plots are showing the standard Illumina output: The quality is higher at the beginning of the read and slowly gets worse and worse as the read progresses. This is typical of Illumina sequencing because of phasing. The forward reads actually look a bit more low quality than the reverse reads.
# vector of our samples, extract sample name from files
samples <- sapply(strsplit(basename(forward_reads), "_"), `[`,1)
# Intuition Check
head(samples)
## [1] "ERR11588428" "ERR11588429" "ERR11588430" "ERR11588431" "ERR11588432"
## [6] "ERR11588433"
# Place filtered reads into filtered_fastqs_path
filtered_fastqs_path <- "/local/workdir/zlr6/git_repos/biomi6300_moonmilk_amplicon_analysis/data/01_DADA2/02_filtered_fastqs"
filtered_fastqs_path
## [1] "/local/workdir/zlr6/git_repos/biomi6300_moonmilk_amplicon_analysis/data/01_DADA2/02_filtered_fastqs"
# create 2 variables: filtered_F, filtered_R
filtered_forward_reads <-
file.path(filtered_fastqs_path, paste0(samples, "_R1_filtered.fastq.gz"))
length(filtered_forward_reads)
## [1] 10
head(filtered_forward_reads)
## [1] "/local/workdir/zlr6/git_repos/biomi6300_moonmilk_amplicon_analysis/data/01_DADA2/02_filtered_fastqs/ERR11588428_R1_filtered.fastq.gz"
## [2] "/local/workdir/zlr6/git_repos/biomi6300_moonmilk_amplicon_analysis/data/01_DADA2/02_filtered_fastqs/ERR11588429_R1_filtered.fastq.gz"
## [3] "/local/workdir/zlr6/git_repos/biomi6300_moonmilk_amplicon_analysis/data/01_DADA2/02_filtered_fastqs/ERR11588430_R1_filtered.fastq.gz"
## [4] "/local/workdir/zlr6/git_repos/biomi6300_moonmilk_amplicon_analysis/data/01_DADA2/02_filtered_fastqs/ERR11588431_R1_filtered.fastq.gz"
## [5] "/local/workdir/zlr6/git_repos/biomi6300_moonmilk_amplicon_analysis/data/01_DADA2/02_filtered_fastqs/ERR11588432_R1_filtered.fastq.gz"
## [6] "/local/workdir/zlr6/git_repos/biomi6300_moonmilk_amplicon_analysis/data/01_DADA2/02_filtered_fastqs/ERR11588433_R1_filtered.fastq.gz"
# reverse reads
filtered_reverse_reads <-
file.path(filtered_fastqs_path, paste0(samples, "_R2_filtered.fastq.gz"))
length(filtered_reverse_reads)
## [1] 10
head(filtered_reverse_reads)
## [1] "/local/workdir/zlr6/git_repos/biomi6300_moonmilk_amplicon_analysis/data/01_DADA2/02_filtered_fastqs/ERR11588428_R2_filtered.fastq.gz"
## [2] "/local/workdir/zlr6/git_repos/biomi6300_moonmilk_amplicon_analysis/data/01_DADA2/02_filtered_fastqs/ERR11588429_R2_filtered.fastq.gz"
## [3] "/local/workdir/zlr6/git_repos/biomi6300_moonmilk_amplicon_analysis/data/01_DADA2/02_filtered_fastqs/ERR11588430_R2_filtered.fastq.gz"
## [4] "/local/workdir/zlr6/git_repos/biomi6300_moonmilk_amplicon_analysis/data/01_DADA2/02_filtered_fastqs/ERR11588431_R2_filtered.fastq.gz"
## [5] "/local/workdir/zlr6/git_repos/biomi6300_moonmilk_amplicon_analysis/data/01_DADA2/02_filtered_fastqs/ERR11588432_R2_filtered.fastq.gz"
## [6] "/local/workdir/zlr6/git_repos/biomi6300_moonmilk_amplicon_analysis/data/01_DADA2/02_filtered_fastqs/ERR11588433_R2_filtered.fastq.gz"
Parameters of filter and trim DEPEND ON THE DATASET.
The things to keep in mind are:
- The library preparation: Are the primers included in the sequence?
If so, they need to be trimmed out in this step.
- What do the above quality profiles of the reads look like? If they
are lower quality, it is highly recommended to use
maxEE = c(1,1).
- Do the reads dip suddenly in their quality? If so, explore
trimLeft and truncLen
Check out more of the parameters using ?filterAndTrim to
bring up the help page and do some googling about it. Some notes on two
examples are below, with a description of a few of the parameters:
Moonmilk Dataset: This dataset was generated with the library
preparation described by Theodorescu et al.,
2023 Microbial Ecology, the reads maintained high Phred Scores
(above 30, even more typically above ~34) until aroud 200bps. Therefore,
we will use a stringent maxEE = c(1,1) and truncate the
reads at 250bps.
maxEE is a quality filtering threshold applied to
expected errors. Here, if there’s 2 expected errors. It’s ok. But more
than 2. Throw away the sequence. Two values, first is for forward reads;
second is for reverse reads.trimLeft can be used to remove the beginning bases of a
read (e.g. to trim out primers!)truncLen can be used to trim your sequences after a
specific base pair when the quality gets lower. Though, please note that
this will shorten the ASVs! For example, this can be used when the
quality of the sequence suddenly gets lower, or clearly is typically
lower. So, if the quality of the read drops below a phred score of 25
(on the y-axis of the plotQualityProfile above, which indicates ~99.5%
confidence per base).maxN the number of N bases. Here, using ASVs, we should
ALWAYS remove all Ns from the data.# Assign a vector to filtered reads
# trim out poor bases, first 3 bps on F reads
# write out filtered fastq files
filtered_reads <-
filterAndTrim(fwd = forward_reads, filt = filtered_forward_reads,
rev = reverse_reads, filt.rev = filtered_reverse_reads,
maxN = 0, maxEE = c(1,1),
truncLen = 250, rm.phix = TRUE, compress = TRUE, multithread = TRUE)
# Plot the 12 random samples after QC
forward_filteredQual_plot_12 <-
plotQualityProfile(filtered_forward_reads[random_samples]) +
labs(title = "Trimmed Forward Read Quality")
# Show the plot
forward_filteredQual_plot_12
reverse_filteredQual_plot_12 <-
plotQualityProfile(filtered_reverse_reads[random_samples]) +
labs(title = "Trimmed Reverse Read Quality")
# Show the plot
reverse_filteredQual_plot_12
# Put the two plots together
forward_filteredQual_plot_12 + reverse_filteredQual_plot_12
# Aggregate all QC plots
# Forward reads
forward_postQC_plot <-
plotQualityProfile(filtered_forward_reads, aggregate = TRUE) +
labs(title = "Forward Post-QC")
# Show the plot
forward_postQC_plot
# reverse reads
reverse_postQC_plot <-
plotQualityProfile(filtered_reverse_reads, aggregate = TRUE) +
labs(title = "Reverse Post-QC")
# Show the plot
reverse_postQC_plot
postQC_aggregate_plot <- forward_postQC_plot + reverse_postQC_plot
# Show the plot
postQC_aggregate_plot
The quality of the sequences look much better. It’s even more obvious that the reverse reads are higher quality for some reason, but even so the quality of the forward reads is fine.
filterAndTrim# Make output into dataframe
filtered_df <- as.data.frame(filtered_reads)
head(filtered_df)
## reads.in reads.out
## ERR11588428_1.fastq.gz 96845 58385
## ERR11588429_1.fastq.gz 101519 58720
## ERR11588430_1.fastq.gz 95676 57401
## ERR11588431_1.fastq.gz 90880 55342
## ERR11588432_1.fastq.gz 85608 53582
## ERR11588433_1.fastq.gz 73835 45215
# calculate some stats
filtered_df %>%
reframe(median_reads_in = median(reads.in),
median_reads_out = median(reads.out),
median_percent_retained = (median(reads.out)/median(reads.in)))
## median_reads_in median_reads_out median_percent_retained
## 1 84523.5 49398.5 0.5844351
we retained 58.4% of our reads. This should be “enough”. I think our
filterAndTrim() parameters are alright for this
situations.
# Plot the pre and post together in one plot
preQC_aggregate_plot / postQC_aggregate_plot
# Forward reads
error_forward_reads <-
learnErrors(filtered_forward_reads, multithread = TRUE)
## 101085500 total bases in 404342 reads from 8 samples will be used for learning the error rates.
# Plot Forward
forward_error_plot <-
plotErrors(error_forward_reads, nominalQ = TRUE) +
labs(title = "Forward Read Error Model")
# Show the plot
forward_error_plot
## Warning in scale_y_log10(): log-10 transformation introduced infinite values.
# Reverse reads
error_reverse_reads <-
learnErrors(filtered_reverse_reads, multithread = TRUE)
## 101085500 total bases in 404342 reads from 8 samples will be used for learning the error rates.
# Plot reverse
reverse_error_plot <-
plotErrors(error_reverse_reads, nominalQ = TRUE) +
labs(title = "Reverse Read Error Model")
# Show the plot
reverse_error_plot
## Warning in scale_y_log10(): log-10 transformation introduced infinite values.
# Put the two plots together
forward_error_plot + reverse_error_plot
## Warning in scale_y_log10(): log-10 transformation introduced infinite values.
## log-10 transformation introduced infinite values.
Details of the plot: - Points: The observed error
rates for each consensus quality score.
- Black line: Estimated error rates after convergence
of the machine-learning algorithm.
- Red line: The error rates expected under the nominal
definition of the Q-score.
Similar to what is mentioned in the dada2 tutorial: the estimated error rates (black line) are a “reasonably good” fit to the observed rates (points), and the error rates drop slowly with increased quality as expected. We can now infer ASVs!
An important note: This process occurs separately on forward and reverse reads! This is quite a different approach from how OTUs are identified in Mothur and also from UCHIME, oligotyping, and other OTU, MED, and ASV approaches.
# Infer ASVs on the forward sequences
dada_forward <- dada(filtered_forward_reads,
err = error_forward_reads,
multithread = TRUE)
## Sample 1 - 58385 reads in 27900 unique sequences.
## Sample 2 - 58720 reads in 30013 unique sequences.
## Sample 3 - 57401 reads in 28695 unique sequences.
## Sample 4 - 55342 reads in 7183 unique sequences.
## Sample 5 - 53582 reads in 7981 unique sequences.
## Sample 6 - 45215 reads in 7895 unique sequences.
## Sample 7 - 36726 reads in 11732 unique sequences.
## Sample 8 - 38971 reads in 12236 unique sequences.
## Sample 9 - 35047 reads in 13799 unique sequences.
## Sample 10 - 33571 reads in 6950 unique sequences.
typeof(dada_forward)
## [1] "list"
# Grab a sample and look at it
dada_forward$`ERR11588428_R1_filtered.fastq.gz`
## dada-class: object describing DADA2 denoising results
## 1113 sequence variants were inferred from 27900 input unique sequences.
## Key parameters: OMEGA_A = 1e-40, OMEGA_C = 1e-40, BAND_SIZE = 16
# Infer ASVs on the reverse sequences
dada_reverse <- dada(filtered_reverse_reads,
err = error_reverse_reads,
multithread = TRUE)
## Sample 1 - 58385 reads in 24541 unique sequences.
## Sample 2 - 58720 reads in 26623 unique sequences.
## Sample 3 - 57401 reads in 25829 unique sequences.
## Sample 4 - 55342 reads in 9593 unique sequences.
## Sample 5 - 53582 reads in 9543 unique sequences.
## Sample 6 - 45215 reads in 5843 unique sequences.
## Sample 7 - 36726 reads in 8137 unique sequences.
## Sample 8 - 38971 reads in 8598 unique sequences.
## Sample 9 - 35047 reads in 15273 unique sequences.
## Sample 10 - 33571 reads in 9235 unique sequences.
# Inspect
dada_reverse[1]
## $ERR11588428_R2_filtered.fastq.gz
## dada-class: object describing DADA2 denoising results
## 1124 sequence variants were inferred from 24541 input unique sequences.
## Key parameters: OMEGA_A = 1e-40, OMEGA_C = 1e-40, BAND_SIZE = 16
dada_reverse[10]
## $ERR11588437_R2_filtered.fastq.gz
## dada-class: object describing DADA2 denoising results
## 475 sequence variants were inferred from 9235 input unique sequences.
## Key parameters: OMEGA_A = 1e-40, OMEGA_C = 1e-40, BAND_SIZE = 16
Now, merge the forward and reverse ASVs into contigs.
# merge forward and reverse ASVs
merged_ASVs <- mergePairs(dada_forward, filtered_forward_reads,
dada_reverse, filtered_reverse_reads,
verbose = TRUE)
## 40228 paired-reads (in 3022 unique pairings) successfully merged out of 50530 (in 11364 pairings) input.
## 37668 paired-reads (in 2880 unique pairings) successfully merged out of 49375 (in 12412 pairings) input.
## 37461 paired-reads (in 2908 unique pairings) successfully merged out of 48706 (in 12020 pairings) input.
## 53845 paired-reads (in 1527 unique pairings) successfully merged out of 54153 (in 1634 pairings) input.
## 52115 paired-reads (in 1726 unique pairings) successfully merged out of 52404 (in 1823 pairings) input.
## 43966 paired-reads (in 1061 unique pairings) successfully merged out of 44432 (in 1144 pairings) input.
## 34647 paired-reads (in 1583 unique pairings) successfully merged out of 35319 (in 1837 pairings) input.
## 36933 paired-reads (in 1550 unique pairings) successfully merged out of 37498 (in 1853 pairings) input.
## 26496 paired-reads (in 2689 unique pairings) successfully merged out of 31199 (in 5866 pairings) input.
## 30449 paired-reads (in 1202 unique pairings) successfully merged out of 31272 (in 1473 pairings) input.
# Evaluate the output
typeof(merged_ASVs)
## [1] "list"
length(merged_ASVs)
## [1] 10
names(merged_ASVs)
## [1] "ERR11588428_R1_filtered.fastq.gz" "ERR11588429_R1_filtered.fastq.gz"
## [3] "ERR11588430_R1_filtered.fastq.gz" "ERR11588431_R1_filtered.fastq.gz"
## [5] "ERR11588432_R1_filtered.fastq.gz" "ERR11588433_R1_filtered.fastq.gz"
## [7] "ERR11588434_R1_filtered.fastq.gz" "ERR11588435_R1_filtered.fastq.gz"
## [9] "ERR11588436_R1_filtered.fastq.gz" "ERR11588437_R1_filtered.fastq.gz"
# Inspect the merger data.frame from the 20210602-MA-ABB1P
head(merged_ASVs[[3]])
## sequence
## 1 GACTACCGGGGTATCTAATCCTGTTTGCTCCCCACGCTTTCGCGCCTCAGTGTCAGTATCTGTCCAGGTAGCCGCCTTCGCCACTGGTGTTCCTTCCGATCTCTACGCATTTCACCGCTACACCGGAAATTCCACTACCCTCTACAGTACTCTAGTACATCAGTATAAGTTGCACCTCCCAGGTTAAGCCCAGGTCTTTCACAACTCACTTAATGCACCACCTACGCGCCCTTTACGCCCAGTAACTCCGATTAACGCTTGCACCCTCTGTATTACCGCGGCTGCTGGCACAGAGTTAGCCGGTGCTTATTCTGTTGGTACAATCAAATGTATCATCTCTTAAACTATACATCTTTTCCCCAACCTAAAGTGCTTTACAACCCGAAGGCCTTCTTCACACACGCGGTATTGCTGGATCAGGGTTGCCCCCATTGTCCAATATTCCCCACTGCTGCCCCCCGTAGG
## 2 GACTACCGGGGTATCTAATCCTGTTTGCTCCCCACGCTTTCGCGCCTCAGTGTCAGGTGTAGGTTAGAAAACCGCCTTCGCCACCGGTGTTCTTCCACATATCTACGCATTTCACCGCTACATGTGGAATTCCGTTTTCCCCACCTATCCTCTAGATTAGAAGTTTCAGATGCCGCTCCGAAGTTGAGCCCCGGAATTTCACATCTGACTTTCCAAACCACCTACGCGCCCTTTACGCCCAATAAATCCGATTAATGCTTGCACCCTCCGTATTACCGCAGCTGCTGGCACGGAGTTAGCCGGTGCTTCTTTACCTGGTACCCTCAAATTAGCGGATTATTCACCCGCTTTTCTTGTTTCCAAGCGAAAGAGCTTTACAACCCGAAGGCCTTCTTCGCTCACACGGCGTCGCTTCGTCAGGCTTGCGCCCATTGCGAAAGATCCTCGACTGCAGCCCCCCGTAGG
## 3 GACTACCGGGGTATCTAATCCCGTTTGCTACCCTAGCTTTCGCGCCTCAGCGTCAGGAGAGGTCCAGCGACGCGCTTTCGCCACCGGCGTTCCTACCAATATCAACGCATTTCACCGCTCCACTGGTAGTTCCCGTCGCCCCTACCTCCCTCTAGCCCGCCAGTATCCAGGGCAGTCTTCCGGTTGAGCCGAAAGATTTCACCCTGAACTTAACGAACTGCCTACGCGCCCTTTAAGCCCAGTGATTCCGAACAACGTTCGCACGGTTCGTCTTACCGCGGCTGCTGGCACGAACTTAGCCCGTGCTTCCTCCAGGGATAGGTCAGACCTTGCGGCTTTCCTCCCCCTCGACAGTGGTTTACAACCCGAGGGCCTTCATCCCACACGCGGCGTCGCTCGGTCAAGCTTGCGCTCATTGCCGAAGATCCTCGACTGCAGCCCCCCGTAGG
## 4 GACTACCGGGGTATCTAATCCCGTTTGCTCCCCTGGCTTTCGCGCCTCAGCGTCAGTGTCAGCCCAGCAACCCGTCTTCACCTCAGGTGTTCCTCTTGATATCTACGCATTTCACCGCTACACCAAGAATTCCGATTGCCCCTTCTGCACTCTAGCGCGACAGTATCACTTGGCCGTTCTGGGTTAAGCCCAGAGATTTCACAAGTGACTTGTCATGCCGCCTACGCGCCCTTTACGCCCAGTAAATCCGAACAACGCTTGGTCCCTACGTATTACCGCGGCTGCTGGCACGTAGTTAGCCGGACCTTATAAATAGTACCGTCATTTATTCTTCCTATTCTTTCGAAGTTTACATACCGAAATACTTCATCCTTCACGCGGCGTTGCTGGGTCAGGGTTTCCCCCATTGCCCAAAATTCCCGACTGCTGCCCCCCGTAGG
## 5 GACTACCGGGGTATCTAATCCTGTTTGCTCCCCACGCTTTCGCGCCTCAGCGTCAGTTGCGAGCCAGAAAGCCGCCTTCGCCTCTGGTGTTCTTCCTAATATCTACGAATTTCACCTCTACACTAGGAATTCCACTTTCCTCTCTCGCACTCTAGCATTCCAGTATGAAACGCACCTCCCGGGTTAAGCCCGGGGCTTTCACGCCTCACTTAAAATACCGCCTACGCGCCCTTTACGCCCAGTCATTCCGAACAACGCTAGCCCCCTCCGTCTTACCGCGGCTGCTGGCACGGAGTTTGCCGGGGCTTCTTCTCCTGCTACCGTCATTATCTTCACAGGTGAAAGAACTTTACAACCCTAAGGCCTTCTTCATTCACGCGGCATTGCTGGATCAGGGTTTCCCCCATTGTCCAATATTCCCCACTGCTGCCCCCCGTAGG
## 6 GACTACCGGGGTATCTAATCCTGTTTGATCCCCACGCTTTCGCGCCTCAGCGTCAGTATTGGTCCAGGAAGCCGCCTTCGCCACTGGTGTTCCTCCGGATATCTACGCATTTCACCGCTACACCCGGAATTCCGCTTCCCTCTACCATACTCTAGCCAGGCAGTATCGAATGCAATTCCCAGGTTGAGCCCGGGGCTTTCACACCCGACTTACCAAACCGCCTACGCGCCCTTTACGCCCAGTAATTCCGATTAACGCTCGCACCCTCCGTATTACCGCGGCTGCTGGCACGGAGTTAGCCGGTGCTTCTTCTGTAAGTAACGTCAAGACCGAGTGATATTAGCACTCGGCTTTTCTTCCCTACTGAAAGTGCTTTACAACCCGCAGGCCTTCTTCACACACGCGGCATCGCTGGATCAGGCTTGCGCCCATTGTCCAATATTCCCCACTGCTGCCCCCCGTAGG
## abundance forward reverse nmatch nmismatch nindel prefer accept
## 1 100 57 100 35 0 0 2 TRUE
## 2 97 67 53 35 0 0 2 TRUE
## 3 97 88 69 51 0 0 2 TRUE
## 4 96 312 557 60 0 0 1 TRUE
## 5 93 62 58 60 0 0 2 TRUE
## 6 93 28 66 35 0 0 1 TRUE
# Create the ASV Count Table
raw_ASV_table <- makeSequenceTable(merged_ASVs)
# Write out the file to data/01_DADA2
# Check the type and dimensions of the data
dim(raw_ASV_table)
## [1] 10 15863
class(raw_ASV_table)
## [1] "matrix" "array"
typeof(raw_ASV_table)
## [1] "integer"
# Inspect the distribution of sequence lengths of all ASVs in dataset
table(nchar(getSequences(raw_ASV_table)))
##
## 256 271 272 274 275 290 291 292 294 295 296 297 298 299 372 405
## 1 2 10 1 5 14 101 15 3 18 6 12 37 2 2 1
## 409 411 412 416 417 418 419 422 423 425 428 429 432 433 434 435
## 1 2 3 1 9 1 3 7 4 45 2 62 23 17 44 117
## 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451
## 3 1 72 28 1557 1023 369 180 54 1574 268 254 12 477 7 153
## 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467
## 75 140 73 225 24 278 52 113 439 47 58 104 343 5016 2153 95
## 468 469 476 477 479 480 481 486
## 4 4 2 7 3 3 1 1
# Inspect the distribution of sequence lengths of all ASVs in dataset
# AFTER TRIM
data.frame(Seq_Length = nchar(getSequences(raw_ASV_table))) %>%
ggplot(aes(x = Seq_Length )) +
geom_histogram() +
labs(title = "Raw distribution of ASV length")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
###################################################
###################################################
# TRIM THE ASVS
# Let's trim the ASVs to only be the right size. The most common size of merged ASV is 465/466, which matches the range of V3-V4 region of 450 to 600 base pairs.
# We will allow for a few
raw_ASV_table_trimmed <- raw_ASV_table[,nchar(colnames(raw_ASV_table)) %in% 465:466]
# Inspect the distribution of sequence lengths of all ASVs in dataset
table(nchar(getSequences(raw_ASV_table_trimmed)))
##
## 465 466
## 5016 2153
# What proportion is left of the sequences?
sum(raw_ASV_table_trimmed)/sum(raw_ASV_table)
## [1] 0.3977547
# Inspect the distribution of sequence lengths of all ASVs in dataset
# AFTER TRIM
data.frame(Seq_Length = nchar(getSequences(raw_ASV_table_trimmed))) %>%
ggplot(aes(x = Seq_Length )) +
geom_histogram() +
labs(title = "Trimmed distribution of ASV length")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
# Note the peak at 465 is ABOVE 3000
# Remove the chimeras in the raw ASV table
noChimeras_ASV_table <- removeBimeraDenovo(raw_ASV_table_trimmed,
method="consensus",
multithread=TRUE, verbose=TRUE)
## Identified 5024 bimeras out of 7169 input sequences.
# Check the dimensions
dim(noChimeras_ASV_table)
## [1] 10 2145
# What proportion is left of the sequences?
sum(noChimeras_ASV_table)/sum(raw_ASV_table_trimmed)
## [1] 0.5110413
sum(noChimeras_ASV_table)/sum(raw_ASV_table)
## [1] 0.2032691
# Plot it
data.frame(Seq_Length_NoChim = nchar(getSequences(noChimeras_ASV_table))) %>%
ggplot(aes(x = Seq_Length_NoChim )) +
geom_histogram()+
labs(title = "Trimmed + Chimera Removal distribution of ASV length")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
# Note the difference in the peak at 465, which is now BELOW 3000
Here, we will look at the number of reads that were lost in the filtering, denoising, merging, and chimera removal.
# A little function to identify number seqs
getN <- function(x) sum(getUniques(x))
# Make the table to track the seqs
track <- cbind(filtered_reads,
sapply(dada_forward, getN),
sapply(dada_reverse, getN),
sapply(merged_ASVs, getN),
rowSums(noChimeras_ASV_table))
head(track)
## reads.in reads.out
## ERR11588428_1.fastq.gz 96845 58385 53663 54456 40228 11916
## ERR11588429_1.fastq.gz 101519 58720 53193 53731 37668 12022
## ERR11588430_1.fastq.gz 95676 57401 52549 52541 37461 11725
## ERR11588431_1.fastq.gz 90880 55342 54927 54543 53845 3005
## ERR11588432_1.fastq.gz 85608 53582 53088 52881 52115 5515
## ERR11588433_1.fastq.gz 73835 45215 44592 45039 43966 2956
# Update column names to be more informative (most are missing at the moment!)
colnames(track) <- c("input", "filtered", "denoisedF", "denoisedR", "merged", "nochim")
rownames(track) <- samples
# Generate a dataframe to track the reads through our DADA2 pipeline
track_counts_df <-
track %>%
# make it a dataframe
as.data.frame() %>%
rownames_to_column(var = "names") %>%
mutate(perc_reads_retained = 100 * nochim / input)
# Visualize it in table format
DT::datatable(track_counts_df)
# Plot it!
track_counts_df %>%
pivot_longer(input:nochim, names_to = "read_type", values_to = "num_reads") %>%
mutate(read_type = fct_relevel(read_type,
"input", "filtered", "denoisedF", "denoisedR", "merged", "nochim")) %>%
ggplot(aes(x = read_type, y = num_reads, fill = read_type)) +
geom_line(aes(group = names), color = "grey") +
geom_point(shape = 21, size = 3, alpha = 0.8) +
scale_fill_brewer(palette = "Spectral") +
labs(x = "Filtering Step", y = "Number of Sequences") +
theme_bw()
Here, we will use the silva database version 138!
# Classify the ASVs against a reference set using the RDP Naive Bayesian Classifier described by Wang et al., (2007) in AEM
taxa_train <-
assignTaxonomy(noChimeras_ASV_table,
"/workdir/in_class_data/taxonomy/silva_nr99_v138.1_train_set.fa.gz",
multithread=TRUE)
# Add the genus/species information
taxa_addSpecies <-
addSpecies(taxa_train,
"/workdir/in_class_data/taxonomy/silva_species_assignment_v138.1.fa.gz")
# Inspect the taxonomy
taxa_print <- taxa_addSpecies
# Removing sequence rownames for display only
rownames(taxa_print) <- NULL
#View(taxa_print)
Below, we will prepare the following:
ASV_fastas: A fasta file that we can use to build a
tree for phylogenetic analyses (e.g. phylogenetic alpha diversity
metrics or UNIFRAC dissimilarty).########### 2. COUNT TABLE ###############
############## Modify the ASV names and then save a fasta file! ##############
# Give headers more manageable names
# First pull the ASV sequences
asv_seqs <- colnames(noChimeras_ASV_table)
asv_seqs[1:5]
## [1] "GACTACCGGGGTATCTAATCCCGTTTGCTCCCCTAGCTTTCGCGCCTCAGTGTCAGTGTCGGTCTAGGAAGCCGCCTTCGCCACCGGTGTTCCTCCGGATATCTACGCATTTCACCGCTACACCCGGAATTCCGCTTCCCTCTCCCGAACTCTAGCCCGCCAGTTTCCCGTGCCATTCCTCAGTTGAGCCGAGGGCTTTCACACGGGACTTAGCGGACCACCTACGCGCCCTTTACGCCCAGTAATTCCGAACAACGCTTGCCACCTCTGTATTACCGCGGCTGCTGGCACAGAGTTAGCCGTGGCTTCCTCCACCGGTACAGTCAATAGCCCGGCCTGTTCAGCCGTTCTACATTCGTCCCGGTCGACAGGGGTTTACGATCCGAAGACCTTCATCCCCCACGCGGCGTTGCTTCGTCAGGGTTTCCCCCATTGCGAAAAATTCCCCACTGCAGCCCCCCGTAGG"
## [2] "CCTACGGGGGGCTGCAGTGGGGAATATTGGACAATGGGCGAAAGCCTGATCCAGCCATGCCGCGTGTGTGAAGAAGGTCTTCGGATTGTAAAGCACTTTAAGTTGGGAGGAAGGGCATTAACCTAATACGTTAGTGTTTTGACGTTACCGACAGAATAAGCACCGGCTAACTCTGTGCCAGCAGCCGCGGTAATACAGAGGGTGCAAGCGTTAATCGGAATTACTGGGCGTAAAGCGCGCGTAGGTGGTTTGTTAAGTTGGATGTGAAATCCCCGGGCTCAACCTGGGAACTGCATTCAAAACTGACTGACTAGAGTATGGTAGAGGGTGGTGGAATTTCCTGTGTAGCGGTGAAATGCGTAGATATAGGAAGGAACACCAGTGGCGAAGGCGACCACCTGGACTAATACTGACACTGAGGTGCGAAAGCGTGGGGAGCAAACAGGATTAGATACCCCAGTAGTC"
## [3] "CCTACGGGGGGCTGCAGTGGGGAATATTGGACAATGGGCGAAAGCCTGATCCAGCCATGCCGCGTGTGTGAAGAAGGTCTTCGGATTGTAAAGCACTTTAAGTTGGGAGGAAGGGCATTAACCTAATACGTTAGTGTTTTGACGTTACCGACAGAATAAGCACCGGCTAACTCTGTGCCAGCAGCCGCGGTAATACAGAGGGTGCAAGCGTTAATCGGAATTACTGGGCGTAAAGCGCGCGTAGGTGGTTTGTTAAGTTGGATGTGAAATCCCCGGGCTCAACCTGGGAACTGCATTCAAAACTGACTGACTAGAGTATGGTAGAGGGTGGTGGAATTTCCTGTGTAGCGGTGAAATGCGTAGATATAGGAAGGAACACCAGTGGCGAAGGCGACCACCTGGACTAATACTGACACTGAGGTGCGAAAGCGTGGGGAGCAAACAGGATTAGATACCCCGGTAGTC"
## [4] "CCTACGGGGGGCTGCAGTGGGGAATATTGGACAATGGGCGAAAGCCTGATCCAGCCATGCCGCGTGTGTGAAGAAGGTCTTCGGATTGTAAAGCACTTTAAGTTGGGAGGAAGGGCATTAACCTAATACGTTAGTGTTTTGACGTTACCGACAGAATAAGCACCGGCTAACTCTGTGCCAGCAGCCGCGGTAATACAGAGGGTGCAAGCGTTAATCGGAATTACTGGGCGTAAAGCGCGCGTAGGTGGTTTGTTAAGTTGGATGTGAAATCCCCGGGCTCAACCTGGGAACTGCATTCAAAACTGACTGACTAGAGTATGGTAGAGGGTGGTGGAATTTCCTGTGTAGCGGTGAAATGCGTAGATATAGGAAGGAACACCAGTGGCGAAGGCGACCACCTGGACTAATACTGACACTGAGGTGCGAAAGCGTGGGGAGCAAACAGGATTAGATACCCCTGTAGTC"
## [5] "GACTACTGGGGTATCTAATCCCGTTTGCTCCCCTAGCTTTCGCGCCTCAGTGTCAGTGTCGGTCTAGGAAGCCGCCTTCGCCACCGGTGTTCCTCCGGATATCTACGCATTTCACCGCTACACCCGGAATTCCGCTTCCCTCTCCCGAACTCTAGCCCGCCAGTTTCCCGTGCCATTCCTCAGTTGAGCCGAGGGCTTTCACACGGGACTTAGCGGACCACCTACGCGCCCTTTACGCCCAGTAATTCCGAACAACGCTTGCCACCTCTGTATTACCGCGGCTGCTGGCACAGAGTTAGCCGTGGCTTCCTCCACCGGTACAGTCAATAGCCCGGCCTGTTCAGCCGTTCTACATTCGTCCCGGTCGACAGGGGTTTACGATCCGAAGACCTTCATCCCCCACGCGGCGTTGCTTCGTCAGGGTTTCCCCCATTGCGAAAAATTCCCCACTGCAGCCCCCCGTAGG"
# make headers for our ASV seq fasta file, which will be our asv names
asv_headers <- vector(dim(noChimeras_ASV_table)[2], mode = "character")
asv_headers[1:5]
## [1] "" "" "" "" ""
# loop through vector and fill it in with ASV names
for (i in 1:dim(noChimeras_ASV_table)[2]) {
asv_headers[i] <- paste(">ASV", i, sep = "_")
}
# intitution check
asv_headers[1:5]
## [1] ">ASV_1" ">ASV_2" ">ASV_3" ">ASV_4" ">ASV_5"
##### Rename ASVs in table then write out our ASV fasta file!
#View(noChimeras_ASV_table)
asv_tab <- t(noChimeras_ASV_table)
#View(asv_tab)
## Rename our asvs!
row.names(asv_tab) <- sub(">", "", asv_headers)
#View(asv_tab)
# Inspect the taxonomy table
#View(taxa_addSpecies)
##### Prepare tax table
# Add the ASV sequences from the rownames to a column
new_tax_tab <-
taxa_addSpecies%>%
as.data.frame() %>%
rownames_to_column(var = "ASVseqs")
head(new_tax_tab)
## ASVseqs
## 1 GACTACCGGGGTATCTAATCCCGTTTGCTCCCCTAGCTTTCGCGCCTCAGTGTCAGTGTCGGTCTAGGAAGCCGCCTTCGCCACCGGTGTTCCTCCGGATATCTACGCATTTCACCGCTACACCCGGAATTCCGCTTCCCTCTCCCGAACTCTAGCCCGCCAGTTTCCCGTGCCATTCCTCAGTTGAGCCGAGGGCTTTCACACGGGACTTAGCGGACCACCTACGCGCCCTTTACGCCCAGTAATTCCGAACAACGCTTGCCACCTCTGTATTACCGCGGCTGCTGGCACAGAGTTAGCCGTGGCTTCCTCCACCGGTACAGTCAATAGCCCGGCCTGTTCAGCCGTTCTACATTCGTCCCGGTCGACAGGGGTTTACGATCCGAAGACCTTCATCCCCCACGCGGCGTTGCTTCGTCAGGGTTTCCCCCATTGCGAAAAATTCCCCACTGCAGCCCCCCGTAGG
## 2 CCTACGGGGGGCTGCAGTGGGGAATATTGGACAATGGGCGAAAGCCTGATCCAGCCATGCCGCGTGTGTGAAGAAGGTCTTCGGATTGTAAAGCACTTTAAGTTGGGAGGAAGGGCATTAACCTAATACGTTAGTGTTTTGACGTTACCGACAGAATAAGCACCGGCTAACTCTGTGCCAGCAGCCGCGGTAATACAGAGGGTGCAAGCGTTAATCGGAATTACTGGGCGTAAAGCGCGCGTAGGTGGTTTGTTAAGTTGGATGTGAAATCCCCGGGCTCAACCTGGGAACTGCATTCAAAACTGACTGACTAGAGTATGGTAGAGGGTGGTGGAATTTCCTGTGTAGCGGTGAAATGCGTAGATATAGGAAGGAACACCAGTGGCGAAGGCGACCACCTGGACTAATACTGACACTGAGGTGCGAAAGCGTGGGGAGCAAACAGGATTAGATACCCCAGTAGTC
## 3 CCTACGGGGGGCTGCAGTGGGGAATATTGGACAATGGGCGAAAGCCTGATCCAGCCATGCCGCGTGTGTGAAGAAGGTCTTCGGATTGTAAAGCACTTTAAGTTGGGAGGAAGGGCATTAACCTAATACGTTAGTGTTTTGACGTTACCGACAGAATAAGCACCGGCTAACTCTGTGCCAGCAGCCGCGGTAATACAGAGGGTGCAAGCGTTAATCGGAATTACTGGGCGTAAAGCGCGCGTAGGTGGTTTGTTAAGTTGGATGTGAAATCCCCGGGCTCAACCTGGGAACTGCATTCAAAACTGACTGACTAGAGTATGGTAGAGGGTGGTGGAATTTCCTGTGTAGCGGTGAAATGCGTAGATATAGGAAGGAACACCAGTGGCGAAGGCGACCACCTGGACTAATACTGACACTGAGGTGCGAAAGCGTGGGGAGCAAACAGGATTAGATACCCCGGTAGTC
## 4 CCTACGGGGGGCTGCAGTGGGGAATATTGGACAATGGGCGAAAGCCTGATCCAGCCATGCCGCGTGTGTGAAGAAGGTCTTCGGATTGTAAAGCACTTTAAGTTGGGAGGAAGGGCATTAACCTAATACGTTAGTGTTTTGACGTTACCGACAGAATAAGCACCGGCTAACTCTGTGCCAGCAGCCGCGGTAATACAGAGGGTGCAAGCGTTAATCGGAATTACTGGGCGTAAAGCGCGCGTAGGTGGTTTGTTAAGTTGGATGTGAAATCCCCGGGCTCAACCTGGGAACTGCATTCAAAACTGACTGACTAGAGTATGGTAGAGGGTGGTGGAATTTCCTGTGTAGCGGTGAAATGCGTAGATATAGGAAGGAACACCAGTGGCGAAGGCGACCACCTGGACTAATACTGACACTGAGGTGCGAAAGCGTGGGGAGCAAACAGGATTAGATACCCCTGTAGTC
## 5 GACTACTGGGGTATCTAATCCCGTTTGCTCCCCTAGCTTTCGCGCCTCAGTGTCAGTGTCGGTCTAGGAAGCCGCCTTCGCCACCGGTGTTCCTCCGGATATCTACGCATTTCACCGCTACACCCGGAATTCCGCTTCCCTCTCCCGAACTCTAGCCCGCCAGTTTCCCGTGCCATTCCTCAGTTGAGCCGAGGGCTTTCACACGGGACTTAGCGGACCACCTACGCGCCCTTTACGCCCAGTAATTCCGAACAACGCTTGCCACCTCTGTATTACCGCGGCTGCTGGCACAGAGTTAGCCGTGGCTTCCTCCACCGGTACAGTCAATAGCCCGGCCTGTTCAGCCGTTCTACATTCGTCCCGGTCGACAGGGGTTTACGATCCGAAGACCTTCATCCCCCACGCGGCGTTGCTTCGTCAGGGTTTCCCCCATTGCGAAAAATTCCCCACTGCAGCCCCCCGTAGG
## 6 CCTACGGGGGGCAGCAGTGGGGAATATTGGACAATGGGCGAAAGCCTGATCCAGCCATGCCGCGTGTGTGAAGAAGGTCTTCGGATTGTAAAGCACTTTAAGTTGGGAGGAAGGGCATTAACCTAATACGTTAGTGTTTTGACGTTACCGACAGAATAAGCACCGGCTAACTCTGTGCCAGCAGCCGCGGTAATACAGAGGGTGCAAGCGTTAATCGGAATTACTGGGCGTAAAGCGCGCGTAGGTGGTTTGTTAAGTTGGATGTGAAATCCCCGGGCTCAACCTGGGAACTGCATTCAAAACTGACTGACTAGAGTATGGTAGAGGGTGGTGGAATTTCCTGTGTAGCGGTGAAATGCGTAGATATAGGAAGGAACACCAGTGGCGAAGGCGACCACCTGGACTAATACTGACACTGAGGTGCGAAAGCGTGGGGAGCAAACAGGATTAGATACCCCAGTAGTC
## Kingdom Phylum Class Order Family
## 1 <NA> <NA> <NA> <NA> <NA>
## 2 Bacteria Proteobacteria Gammaproteobacteria Pseudomonadales Pseudomonadaceae
## 3 Bacteria Proteobacteria Gammaproteobacteria Pseudomonadales Pseudomonadaceae
## 4 Bacteria Proteobacteria Gammaproteobacteria Pseudomonadales Pseudomonadaceae
## 5 <NA> <NA> <NA> <NA> <NA>
## 6 Bacteria Proteobacteria Gammaproteobacteria Pseudomonadales Pseudomonadaceae
## Genus Species
## 1 <NA> <NA>
## 2 Pseudomonas <NA>
## 3 Pseudomonas <NA>
## 4 Pseudomonas <NA>
## 5 <NA> <NA>
## 6 Pseudomonas <NA>
# intution check
stopifnot(new_tax_tab$ASVseqs == colnames(noChimeras_ASV_table))
# Now let's add the ASV names
rownames(new_tax_tab) <- rownames(asv_tab)
head(new_tax_tab)
## ASVseqs
## ASV_1 GACTACCGGGGTATCTAATCCCGTTTGCTCCCCTAGCTTTCGCGCCTCAGTGTCAGTGTCGGTCTAGGAAGCCGCCTTCGCCACCGGTGTTCCTCCGGATATCTACGCATTTCACCGCTACACCCGGAATTCCGCTTCCCTCTCCCGAACTCTAGCCCGCCAGTTTCCCGTGCCATTCCTCAGTTGAGCCGAGGGCTTTCACACGGGACTTAGCGGACCACCTACGCGCCCTTTACGCCCAGTAATTCCGAACAACGCTTGCCACCTCTGTATTACCGCGGCTGCTGGCACAGAGTTAGCCGTGGCTTCCTCCACCGGTACAGTCAATAGCCCGGCCTGTTCAGCCGTTCTACATTCGTCCCGGTCGACAGGGGTTTACGATCCGAAGACCTTCATCCCCCACGCGGCGTTGCTTCGTCAGGGTTTCCCCCATTGCGAAAAATTCCCCACTGCAGCCCCCCGTAGG
## ASV_2 CCTACGGGGGGCTGCAGTGGGGAATATTGGACAATGGGCGAAAGCCTGATCCAGCCATGCCGCGTGTGTGAAGAAGGTCTTCGGATTGTAAAGCACTTTAAGTTGGGAGGAAGGGCATTAACCTAATACGTTAGTGTTTTGACGTTACCGACAGAATAAGCACCGGCTAACTCTGTGCCAGCAGCCGCGGTAATACAGAGGGTGCAAGCGTTAATCGGAATTACTGGGCGTAAAGCGCGCGTAGGTGGTTTGTTAAGTTGGATGTGAAATCCCCGGGCTCAACCTGGGAACTGCATTCAAAACTGACTGACTAGAGTATGGTAGAGGGTGGTGGAATTTCCTGTGTAGCGGTGAAATGCGTAGATATAGGAAGGAACACCAGTGGCGAAGGCGACCACCTGGACTAATACTGACACTGAGGTGCGAAAGCGTGGGGAGCAAACAGGATTAGATACCCCAGTAGTC
## ASV_3 CCTACGGGGGGCTGCAGTGGGGAATATTGGACAATGGGCGAAAGCCTGATCCAGCCATGCCGCGTGTGTGAAGAAGGTCTTCGGATTGTAAAGCACTTTAAGTTGGGAGGAAGGGCATTAACCTAATACGTTAGTGTTTTGACGTTACCGACAGAATAAGCACCGGCTAACTCTGTGCCAGCAGCCGCGGTAATACAGAGGGTGCAAGCGTTAATCGGAATTACTGGGCGTAAAGCGCGCGTAGGTGGTTTGTTAAGTTGGATGTGAAATCCCCGGGCTCAACCTGGGAACTGCATTCAAAACTGACTGACTAGAGTATGGTAGAGGGTGGTGGAATTTCCTGTGTAGCGGTGAAATGCGTAGATATAGGAAGGAACACCAGTGGCGAAGGCGACCACCTGGACTAATACTGACACTGAGGTGCGAAAGCGTGGGGAGCAAACAGGATTAGATACCCCGGTAGTC
## ASV_4 CCTACGGGGGGCTGCAGTGGGGAATATTGGACAATGGGCGAAAGCCTGATCCAGCCATGCCGCGTGTGTGAAGAAGGTCTTCGGATTGTAAAGCACTTTAAGTTGGGAGGAAGGGCATTAACCTAATACGTTAGTGTTTTGACGTTACCGACAGAATAAGCACCGGCTAACTCTGTGCCAGCAGCCGCGGTAATACAGAGGGTGCAAGCGTTAATCGGAATTACTGGGCGTAAAGCGCGCGTAGGTGGTTTGTTAAGTTGGATGTGAAATCCCCGGGCTCAACCTGGGAACTGCATTCAAAACTGACTGACTAGAGTATGGTAGAGGGTGGTGGAATTTCCTGTGTAGCGGTGAAATGCGTAGATATAGGAAGGAACACCAGTGGCGAAGGCGACCACCTGGACTAATACTGACACTGAGGTGCGAAAGCGTGGGGAGCAAACAGGATTAGATACCCCTGTAGTC
## ASV_5 GACTACTGGGGTATCTAATCCCGTTTGCTCCCCTAGCTTTCGCGCCTCAGTGTCAGTGTCGGTCTAGGAAGCCGCCTTCGCCACCGGTGTTCCTCCGGATATCTACGCATTTCACCGCTACACCCGGAATTCCGCTTCCCTCTCCCGAACTCTAGCCCGCCAGTTTCCCGTGCCATTCCTCAGTTGAGCCGAGGGCTTTCACACGGGACTTAGCGGACCACCTACGCGCCCTTTACGCCCAGTAATTCCGAACAACGCTTGCCACCTCTGTATTACCGCGGCTGCTGGCACAGAGTTAGCCGTGGCTTCCTCCACCGGTACAGTCAATAGCCCGGCCTGTTCAGCCGTTCTACATTCGTCCCGGTCGACAGGGGTTTACGATCCGAAGACCTTCATCCCCCACGCGGCGTTGCTTCGTCAGGGTTTCCCCCATTGCGAAAAATTCCCCACTGCAGCCCCCCGTAGG
## ASV_6 CCTACGGGGGGCAGCAGTGGGGAATATTGGACAATGGGCGAAAGCCTGATCCAGCCATGCCGCGTGTGTGAAGAAGGTCTTCGGATTGTAAAGCACTTTAAGTTGGGAGGAAGGGCATTAACCTAATACGTTAGTGTTTTGACGTTACCGACAGAATAAGCACCGGCTAACTCTGTGCCAGCAGCCGCGGTAATACAGAGGGTGCAAGCGTTAATCGGAATTACTGGGCGTAAAGCGCGCGTAGGTGGTTTGTTAAGTTGGATGTGAAATCCCCGGGCTCAACCTGGGAACTGCATTCAAAACTGACTGACTAGAGTATGGTAGAGGGTGGTGGAATTTCCTGTGTAGCGGTGAAATGCGTAGATATAGGAAGGAACACCAGTGGCGAAGGCGACCACCTGGACTAATACTGACACTGAGGTGCGAAAGCGTGGGGAGCAAACAGGATTAGATACCCCAGTAGTC
## Kingdom Phylum Class Order
## ASV_1 <NA> <NA> <NA> <NA>
## ASV_2 Bacteria Proteobacteria Gammaproteobacteria Pseudomonadales
## ASV_3 Bacteria Proteobacteria Gammaproteobacteria Pseudomonadales
## ASV_4 Bacteria Proteobacteria Gammaproteobacteria Pseudomonadales
## ASV_5 <NA> <NA> <NA> <NA>
## ASV_6 Bacteria Proteobacteria Gammaproteobacteria Pseudomonadales
## Family Genus Species
## ASV_1 <NA> <NA> <NA>
## ASV_2 Pseudomonadaceae Pseudomonas <NA>
## ASV_3 Pseudomonadaceae Pseudomonas <NA>
## ASV_4 Pseudomonadaceae Pseudomonas <NA>
## ASV_5 <NA> <NA> <NA>
## ASV_6 Pseudomonadaceae Pseudomonas <NA>
### Final prep of tax table. Add new column with ASV names
asv_tax <-
new_tax_tab %>%
# add rownames from count table for phyloseq handoff
mutate(ASV = rownames(asv_tab)) %>%
# Resort the columns with select
dplyr::select(Kingdom, Phylum, Class, Order, Family, Genus, Species, ASV, ASVseqs)
head(asv_tax)
## Kingdom Phylum Class Order
## ASV_1 <NA> <NA> <NA> <NA>
## ASV_2 Bacteria Proteobacteria Gammaproteobacteria Pseudomonadales
## ASV_3 Bacteria Proteobacteria Gammaproteobacteria Pseudomonadales
## ASV_4 Bacteria Proteobacteria Gammaproteobacteria Pseudomonadales
## ASV_5 <NA> <NA> <NA> <NA>
## ASV_6 Bacteria Proteobacteria Gammaproteobacteria Pseudomonadales
## Family Genus Species ASV
## ASV_1 <NA> <NA> <NA> ASV_1
## ASV_2 Pseudomonadaceae Pseudomonas <NA> ASV_2
## ASV_3 Pseudomonadaceae Pseudomonas <NA> ASV_3
## ASV_4 Pseudomonadaceae Pseudomonas <NA> ASV_4
## ASV_5 <NA> <NA> <NA> ASV_5
## ASV_6 Pseudomonadaceae Pseudomonas <NA> ASV_6
## ASVseqs
## ASV_1 GACTACCGGGGTATCTAATCCCGTTTGCTCCCCTAGCTTTCGCGCCTCAGTGTCAGTGTCGGTCTAGGAAGCCGCCTTCGCCACCGGTGTTCCTCCGGATATCTACGCATTTCACCGCTACACCCGGAATTCCGCTTCCCTCTCCCGAACTCTAGCCCGCCAGTTTCCCGTGCCATTCCTCAGTTGAGCCGAGGGCTTTCACACGGGACTTAGCGGACCACCTACGCGCCCTTTACGCCCAGTAATTCCGAACAACGCTTGCCACCTCTGTATTACCGCGGCTGCTGGCACAGAGTTAGCCGTGGCTTCCTCCACCGGTACAGTCAATAGCCCGGCCTGTTCAGCCGTTCTACATTCGTCCCGGTCGACAGGGGTTTACGATCCGAAGACCTTCATCCCCCACGCGGCGTTGCTTCGTCAGGGTTTCCCCCATTGCGAAAAATTCCCCACTGCAGCCCCCCGTAGG
## ASV_2 CCTACGGGGGGCTGCAGTGGGGAATATTGGACAATGGGCGAAAGCCTGATCCAGCCATGCCGCGTGTGTGAAGAAGGTCTTCGGATTGTAAAGCACTTTAAGTTGGGAGGAAGGGCATTAACCTAATACGTTAGTGTTTTGACGTTACCGACAGAATAAGCACCGGCTAACTCTGTGCCAGCAGCCGCGGTAATACAGAGGGTGCAAGCGTTAATCGGAATTACTGGGCGTAAAGCGCGCGTAGGTGGTTTGTTAAGTTGGATGTGAAATCCCCGGGCTCAACCTGGGAACTGCATTCAAAACTGACTGACTAGAGTATGGTAGAGGGTGGTGGAATTTCCTGTGTAGCGGTGAAATGCGTAGATATAGGAAGGAACACCAGTGGCGAAGGCGACCACCTGGACTAATACTGACACTGAGGTGCGAAAGCGTGGGGAGCAAACAGGATTAGATACCCCAGTAGTC
## ASV_3 CCTACGGGGGGCTGCAGTGGGGAATATTGGACAATGGGCGAAAGCCTGATCCAGCCATGCCGCGTGTGTGAAGAAGGTCTTCGGATTGTAAAGCACTTTAAGTTGGGAGGAAGGGCATTAACCTAATACGTTAGTGTTTTGACGTTACCGACAGAATAAGCACCGGCTAACTCTGTGCCAGCAGCCGCGGTAATACAGAGGGTGCAAGCGTTAATCGGAATTACTGGGCGTAAAGCGCGCGTAGGTGGTTTGTTAAGTTGGATGTGAAATCCCCGGGCTCAACCTGGGAACTGCATTCAAAACTGACTGACTAGAGTATGGTAGAGGGTGGTGGAATTTCCTGTGTAGCGGTGAAATGCGTAGATATAGGAAGGAACACCAGTGGCGAAGGCGACCACCTGGACTAATACTGACACTGAGGTGCGAAAGCGTGGGGAGCAAACAGGATTAGATACCCCGGTAGTC
## ASV_4 CCTACGGGGGGCTGCAGTGGGGAATATTGGACAATGGGCGAAAGCCTGATCCAGCCATGCCGCGTGTGTGAAGAAGGTCTTCGGATTGTAAAGCACTTTAAGTTGGGAGGAAGGGCATTAACCTAATACGTTAGTGTTTTGACGTTACCGACAGAATAAGCACCGGCTAACTCTGTGCCAGCAGCCGCGGTAATACAGAGGGTGCAAGCGTTAATCGGAATTACTGGGCGTAAAGCGCGCGTAGGTGGTTTGTTAAGTTGGATGTGAAATCCCCGGGCTCAACCTGGGAACTGCATTCAAAACTGACTGACTAGAGTATGGTAGAGGGTGGTGGAATTTCCTGTGTAGCGGTGAAATGCGTAGATATAGGAAGGAACACCAGTGGCGAAGGCGACCACCTGGACTAATACTGACACTGAGGTGCGAAAGCGTGGGGAGCAAACAGGATTAGATACCCCTGTAGTC
## ASV_5 GACTACTGGGGTATCTAATCCCGTTTGCTCCCCTAGCTTTCGCGCCTCAGTGTCAGTGTCGGTCTAGGAAGCCGCCTTCGCCACCGGTGTTCCTCCGGATATCTACGCATTTCACCGCTACACCCGGAATTCCGCTTCCCTCTCCCGAACTCTAGCCCGCCAGTTTCCCGTGCCATTCCTCAGTTGAGCCGAGGGCTTTCACACGGGACTTAGCGGACCACCTACGCGCCCTTTACGCCCAGTAATTCCGAACAACGCTTGCCACCTCTGTATTACCGCGGCTGCTGGCACAGAGTTAGCCGTGGCTTCCTCCACCGGTACAGTCAATAGCCCGGCCTGTTCAGCCGTTCTACATTCGTCCCGGTCGACAGGGGTTTACGATCCGAAGACCTTCATCCCCCACGCGGCGTTGCTTCGTCAGGGTTTCCCCCATTGCGAAAAATTCCCCACTGCAGCCCCCCGTAGG
## ASV_6 CCTACGGGGGGCAGCAGTGGGGAATATTGGACAATGGGCGAAAGCCTGATCCAGCCATGCCGCGTGTGTGAAGAAGGTCTTCGGATTGTAAAGCACTTTAAGTTGGGAGGAAGGGCATTAACCTAATACGTTAGTGTTTTGACGTTACCGACAGAATAAGCACCGGCTAACTCTGTGCCAGCAGCCGCGGTAATACAGAGGGTGCAAGCGTTAATCGGAATTACTGGGCGTAAAGCGCGCGTAGGTGGTTTGTTAAGTTGGATGTGAAATCCCCGGGCTCAACCTGGGAACTGCATTCAAAACTGACTGACTAGAGTATGGTAGAGGGTGGTGGAATTTCCTGTGTAGCGGTGAAATGCGTAGATATAGGAAGGAACACCAGTGGCGAAGGCGACCACCTGGACTAATACTGACACTGAGGTGCGAAAGCGTGGGGAGCAAACAGGATTAGATACCCCAGTAGTC
# Intution check
stopifnot(asv_tax$ASV == rownames(asv_tax), rownames(asv_tax) == rownames(asv_tab))
01_DADA2 filesNow, we will write the files! We will write the following to the
data/01_DADA2/ folder. We will save both as files that
could be submitted as supplements AND as .RData objects for easy loading
into the next steps into R.:
ASV_counts.tsv: ASV count table that has ASV names that
are re-written and shortened headers like ASV_1, ASV_2, etc, which will
match the names in our fasta file below. This will also be saved as
data/01_DADA2/ASV_counts.RData.`ASV_counts_withSeqNames.tsv: This is generated with the
data object in this file known as noChimeras_ASV_table. ASV
headers include the entire ASV sequence ~450bps. In addition,
we will save this as a .RData object as
data/01_DADA2/noChimeras_ASV_table.RData as we will use
this data in analysis/02_Taxonomic_Assignment.Rmd to assign
the taxonomy from the sequence headers.ASVs.fasta: A fasta file output of the ASV names from
ASV_counts.tsv and the sequences from the ASVs in
ASV_counts_withSeqNames.tsv. A fasta file that we can use
to build a tree for phylogenetic analyses (e.g. phylogenetic alpha
diversity metrics or UNIFRAC dissimilarty).ASVs.fasta in
data/02_TaxAss_FreshTrain/ to be used for the taxonomy
classification in the next step in the workflow.track_read_counts.RData: To track how many reads we
lost throughout our workflow that could be used and plotted later. We
will add this to the metadata in
analysis/02_Taxonomic_Assignment.Rmd.# FIRST, we will save our output as regular files, which will be useful later on.
# Save to regular .tsv file
# Write BOTH the modified and unmodified ASV tables to a file!
# Write count table with ASV numbered names (e.g. ASV_1, ASV_2, etc)
write.table(asv_tab, "data/01_DADA2/ASV_counts.tsv", sep = "\t", quote = FALSE, col.names = NA)
# Write count table with ASV sequence names
write.table(noChimeras_ASV_table, "data/01_DADA2/ASV_counts_withSeqNames.tsv", sep = "\t", quote = FALSE, col.names = NA)
# Write out the fasta file for reference later on for what seq matches what ASV
asv_fasta <- c(rbind(asv_headers, asv_seqs))
# Save to a file!
write(asv_fasta, "data/01_DADA2/ASVs.fasta")
# SECOND, let's save the taxonomy tables
# Write the table
write.table(asv_tax, "data/01_DADA2/ASV_taxonomy.tsv", sep = "\t", quote = FALSE, col.names = NA)
# THIRD, let's save to a RData object
# Each of these files will be used in the analysis/02_Taxonomic_Assignment
# RData objects are for easy loading :)
save(noChimeras_ASV_table, file = "data/01_DADA2/noChimeras_ASV_table.RData")
save(asv_tab, file = "data/01_DADA2/ASV_counts.RData")
# And save the track_counts_df a R object, which we will merge with metadata information in the next step of the analysis in nalysis/02_Taxonomic_Assignment.
save(track_counts_df, file = "data/01_DADA2/track_read_counts.RData")
# Ensure reproducibility
devtools::session_info()
## ─ Session info ───────────────────────────────────────────────────────────────
## setting value
## version R version 4.3.2 (2023-10-31)
## os Rocky Linux 9.0 (Blue Onyx)
## system x86_64, linux-gnu
## ui X11
## language (EN)
## collate en_US.UTF-8
## ctype en_US.UTF-8
## tz America/New_York
## date 2024-03-18
## pandoc 3.1.1 @ /usr/lib/rstudio-server/bin/quarto/bin/tools/ (via rmarkdown)
##
## ─ Packages ───────────────────────────────────────────────────────────────────
## ! package * version date (UTC) lib source
## P abind 1.4-5 2016-07-21 [?] CRAN (R 4.3.2)
## P ade4 1.7-22 2023-02-06 [?] CRAN (R 4.3.2)
## P ape 5.7-1 2023-03-13 [?] CRAN (R 4.3.2)
## P Biobase 2.62.0 2023-10-24 [?] Bioconductor
## P BiocGenerics 0.48.1 2023-11-01 [?] Bioconductor
## P BiocManager * 1.30.22 2023-08-08 [?] CRAN (R 4.3.2)
## P BiocParallel 1.36.0 2023-10-24 [?] Bioconductor
## P biomformat 1.30.0 2023-10-24 [?] Bioconductor
## P Biostrings 2.70.1 2023-10-25 [?] Bioconductor
## P bitops 1.0-7 2021-04-24 [?] CRAN (R 4.3.2)
## P bslib 0.5.1 2023-08-11 [?] CRAN (R 4.3.2)
## P cachem 1.0.8 2023-05-01 [?] CRAN (R 4.3.2)
## P callr 3.7.3 2022-11-02 [?] CRAN (R 4.3.2)
## P cli 3.6.1 2023-03-23 [?] CRAN (R 4.3.2)
## P cluster 2.1.4 2022-08-22 [?] CRAN (R 4.3.2)
## P codetools 0.2-19 2023-02-01 [?] CRAN (R 4.3.2)
## P colorspace 2.1-0 2023-01-23 [?] CRAN (R 4.3.2)
## P crayon 1.5.2 2022-09-29 [?] CRAN (R 4.3.2)
## P crosstalk 1.2.0 2021-11-04 [?] CRAN (R 4.3.2)
## P dada2 * 1.30.0 2023-10-24 [?] Bioconductor
## P data.table 1.14.8 2023-02-17 [?] CRAN (R 4.3.2)
## P DelayedArray 0.28.0 2023-10-24 [?] Bioconductor
## P deldir 1.0-9 2023-05-17 [?] CRAN (R 4.3.2)
## P devtools * 2.4.4 2022-07-20 [?] CRAN (R 4.2.1)
## P digest 0.6.33 2023-07-07 [?] CRAN (R 4.3.2)
## P dplyr * 1.1.3 2023-09-03 [?] CRAN (R 4.3.2)
## P DT * 0.32 2024-02-19 [?] CRAN (R 4.3.2)
## P ellipsis 0.3.2 2021-04-29 [?] CRAN (R 4.3.2)
## P evaluate 0.23 2023-11-01 [?] CRAN (R 4.3.2)
## P fansi 1.0.5 2023-10-08 [?] CRAN (R 4.3.2)
## P farver 2.1.1 2022-07-06 [?] CRAN (R 4.3.2)
## P fastmap 1.1.1 2023-02-24 [?] CRAN (R 4.3.2)
## P forcats * 1.0.0 2023-01-29 [?] CRAN (R 4.3.2)
## P foreach 1.5.2 2022-02-02 [?] CRAN (R 4.3.2)
## P fs 1.6.3 2023-07-20 [?] CRAN (R 4.3.2)
## P generics 0.1.3 2022-07-05 [?] CRAN (R 4.3.2)
## P GenomeInfoDb 1.38.0 2023-10-24 [?] Bioconductor
## P GenomeInfoDbData 1.2.11 2023-11-07 [?] Bioconductor
## P GenomicAlignments 1.38.0 2023-10-24 [?] Bioconductor
## P GenomicRanges 1.54.1 2023-10-29 [?] Bioconductor
## P ggplot2 * 3.5.0 2024-02-23 [?] CRAN (R 4.3.2)
## P glue 1.6.2 2022-02-24 [?] CRAN (R 4.3.2)
## P gtable 0.3.4 2023-08-21 [?] CRAN (R 4.3.2)
## P highr 0.10 2022-12-22 [?] CRAN (R 4.3.2)
## P hms 1.1.3 2023-03-21 [?] CRAN (R 4.3.2)
## P htmltools 0.5.7 2023-11-03 [?] CRAN (R 4.3.2)
## P htmlwidgets 1.6.2 2023-03-17 [?] CRAN (R 4.3.2)
## P httpuv 1.6.12 2023-10-23 [?] CRAN (R 4.3.2)
## P hwriter 1.3.2.1 2022-04-08 [?] CRAN (R 4.3.2)
## P igraph 1.5.1 2023-08-10 [?] CRAN (R 4.3.2)
## P iNEXT * 3.0.0 2022-08-29 [?] CRAN (R 4.3.2)
## P interp 1.1-6 2024-01-26 [?] CRAN (R 4.3.2)
## P IRanges 2.36.0 2023-10-24 [?] Bioconductor
## P iterators 1.0.14 2022-02-05 [?] CRAN (R 4.3.2)
## P jpeg 0.1-10 2022-11-29 [?] CRAN (R 4.3.2)
## P jquerylib 0.1.4 2021-04-26 [?] CRAN (R 4.3.2)
## P jsonlite 1.8.7 2023-06-29 [?] CRAN (R 4.3.2)
## P knitr 1.45 2023-10-30 [?] CRAN (R 4.3.2)
## P labeling 0.4.3 2023-08-29 [?] CRAN (R 4.3.2)
## P later 1.3.1 2023-05-02 [?] CRAN (R 4.3.2)
## P lattice * 0.21-9 2023-10-01 [?] CRAN (R 4.3.2)
## P latticeExtra 0.6-30 2022-07-04 [?] CRAN (R 4.3.2)
## P lifecycle 1.0.3 2022-10-07 [?] CRAN (R 4.3.2)
## P lubridate * 1.9.3 2023-09-27 [?] CRAN (R 4.3.2)
## P magrittr 2.0.3 2022-03-30 [?] CRAN (R 4.3.2)
## P MASS 7.3-60 2023-05-04 [?] CRAN (R 4.3.2)
## P Matrix 1.6-1.1 2023-09-18 [?] CRAN (R 4.3.2)
## P MatrixGenerics 1.14.0 2023-10-24 [?] Bioconductor
## P matrixStats 1.1.0 2023-11-07 [?] CRAN (R 4.3.2)
## P memoise 2.0.1 2021-11-26 [?] CRAN (R 4.3.2)
## P mgcv 1.9-0 2023-07-11 [?] CRAN (R 4.3.2)
## P mime 0.12 2021-09-28 [?] CRAN (R 4.3.2)
## P miniUI 0.1.1.1 2018-05-18 [?] CRAN (R 4.3.2)
## P multtest 2.58.0 2023-10-24 [?] Bioconductor
## P munsell 0.5.0 2018-06-12 [?] CRAN (R 4.3.2)
## P nlme 3.1-163 2023-08-09 [?] CRAN (R 4.3.2)
## P pacman 0.5.1 2019-03-11 [?] CRAN (R 4.3.2)
## P patchwork * 1.2.0.9000 2024-03-11 [?] Github (thomasp85/patchwork@d943757)
## P permute * 0.9-7 2022-01-27 [?] CRAN (R 4.3.2)
## P phyloseq * 1.46.0 2023-10-24 [?] Bioconductor
## P pillar 1.9.0 2023-03-22 [?] CRAN (R 4.3.2)
## P pkgbuild 1.4.2 2023-06-26 [?] CRAN (R 4.3.2)
## P pkgconfig 2.0.3 2019-09-22 [?] CRAN (R 4.3.2)
## P pkgload 1.3.3 2023-09-22 [?] CRAN (R 4.3.2)
## P plyr 1.8.9 2023-10-02 [?] CRAN (R 4.3.2)
## P png 0.1-8 2022-11-29 [?] CRAN (R 4.3.2)
## P prettyunits 1.2.0 2023-09-24 [?] CRAN (R 4.3.2)
## P processx 3.8.2 2023-06-30 [?] CRAN (R 4.3.2)
## P profvis 0.3.8 2023-05-02 [?] CRAN (R 4.3.2)
## P promises 1.2.1 2023-08-10 [?] CRAN (R 4.3.2)
## P ps 1.7.5 2023-04-18 [?] CRAN (R 4.3.2)
## P purrr * 1.0.2 2023-08-10 [?] CRAN (R 4.3.2)
## P R6 2.5.1 2021-08-19 [?] CRAN (R 4.3.2)
## P RColorBrewer 1.1-3 2022-04-03 [?] CRAN (R 4.3.2)
## P Rcpp * 1.0.11 2023-07-06 [?] CRAN (R 4.3.2)
## P RcppParallel 5.1.7 2023-02-27 [?] CRAN (R 4.3.2)
## P RCurl 1.98-1.13 2023-11-02 [?] CRAN (R 4.3.2)
## P readr * 2.1.5 2024-01-10 [?] CRAN (R 4.3.2)
## P remotes 2.4.2.1 2023-07-18 [?] CRAN (R 4.3.2)
## renv 1.0.5 2024-02-29 [1] CRAN (R 4.3.2)
## P reshape2 1.4.4 2020-04-09 [?] CRAN (R 4.3.2)
## P rhdf5 2.46.1 2023-11-29 [?] Bioconductor 3.18 (R 4.3.2)
## P rhdf5filters 1.14.1 2023-11-06 [?] Bioconductor
## P Rhdf5lib 1.24.2 2024-02-07 [?] Bioconductor 3.18 (R 4.3.2)
## P rlang 1.1.2 2023-11-04 [?] CRAN (R 4.3.2)
## P rmarkdown 2.25 2023-09-18 [?] CRAN (R 4.3.2)
## P Rsamtools 2.18.0 2023-10-24 [?] Bioconductor
## P rstudioapi 0.15.0 2023-07-07 [?] CRAN (R 4.3.2)
## P S4Arrays 1.2.0 2023-10-24 [?] Bioconductor
## P S4Vectors 0.40.1 2023-10-26 [?] Bioconductor
## P sass 0.4.7 2023-07-15 [?] CRAN (R 4.3.2)
## P scales 1.3.0 2023-11-28 [?] CRAN (R 4.3.2)
## P sessioninfo 1.2.2 2021-12-06 [?] CRAN (R 4.3.2)
## P shiny 1.7.5.1 2023-10-14 [?] CRAN (R 4.3.2)
## P ShortRead 1.60.0 2023-10-24 [?] Bioconductor
## P SparseArray 1.2.1 2023-11-05 [?] Bioconductor
## P stringi 1.7.12 2023-01-11 [?] CRAN (R 4.3.2)
## P stringr * 1.5.0 2022-12-02 [?] CRAN (R 4.3.2)
## P SummarizedExperiment 1.32.0 2023-10-24 [?] Bioconductor
## P survival 3.5-7 2023-08-14 [?] CRAN (R 4.3.2)
## P tibble * 3.2.1 2023-03-20 [?] CRAN (R 4.3.2)
## P tidyr * 1.3.0 2023-01-24 [?] CRAN (R 4.3.2)
## P tidyselect 1.2.0 2022-10-10 [?] CRAN (R 4.3.2)
## P tidyverse * 2.0.0 2023-02-22 [?] CRAN (R 4.3.2)
## P timechange 0.3.0 2024-01-18 [?] CRAN (R 4.3.2)
## P tzdb 0.4.0 2023-05-12 [?] CRAN (R 4.3.2)
## P urlchecker 1.0.1 2021-11-30 [?] CRAN (R 4.3.2)
## P usethis * 2.2.2 2023-07-06 [?] CRAN (R 4.3.2)
## P utf8 1.2.4 2023-10-22 [?] CRAN (R 4.3.2)
## P vctrs 0.6.4 2023-10-12 [?] CRAN (R 4.3.2)
## P vegan * 2.6-4 2022-10-11 [?] CRAN (R 4.3.2)
## P withr 2.5.2 2023-10-30 [?] CRAN (R 4.3.2)
## P xfun 0.41 2023-11-01 [?] CRAN (R 4.3.2)
## P xtable 1.8-4 2019-04-21 [?] CRAN (R 4.3.2)
## P XVector 0.42.0 2023-10-24 [?] Bioconductor
## P yaml 2.3.7 2023-01-23 [?] CRAN (R 4.3.2)
## P zlibbioc 1.48.0 2023-10-24 [?] Bioconductor
##
## [1] /local/workdir/zlr6/git_repos/biomi6300_moonmilk_amplicon_analysis/renv/library/R-4.3/x86_64-pc-linux-gnu
## [2] /home/zlr6/.cache/R/renv/sandbox/R-4.3/x86_64-pc-linux-gnu/0b9136a1
##
## P ── Loaded and on-disk path mismatch.
##
## ──────────────────────────────────────────────────────────────────────────────